/* ///////////////////////////////////////////////////////////////////////// */
/*  This is part of the source of the OMAP 5912 heterogeneous dual-core      */
/*  MPEG-4 SP video decoder published in ACM Transactions on Embedded        */
/*  Computing Systems, Vol. X, Issue Y.                                      */
/* ------------------------------------------------------------------------- */
/*  The source code is released under GPL license.                           */
/*                                                                           */
/*  Copyright, 2011                                                          */
/*  Multimedia Embedded Systems Labs                                         */
/*  Dept. of Computer Science                                                */
/*  National Chiao Tung University                                           */
/*  Hsinchu, Taiwan.                                                         */
/* ------------------------------------------------------------------------- */
/*   bilinear8x8.c                                                           */
/*   MPEG-4 half-pel interpolation functions. This function is a rewrite of  */
/*   the equvalent components of xvidcore 0.9 by Peter Ross                  */
/* ///////////////////////////////////////////////////////////////////////// */

#include "metypes.h"
#include "bilinear8x8.h"
#include "mem_address.h"

unsigned short *global_src = (unsigned short *) interpolation_global_src_address;
unsigned short *global_dst = (unsigned short *) interpolation_global_dst_address;

void
HW_halfpel8x8_h(uint8 * dst, uint8 * src, const int input_stride,
                const int output_stride, const int rounding)
{
    int     cty;
    //#pragma MUST_ITERATE(9,9)
    for (cty = 0; cty < 9; cty++)   //src
    {
        global_src[1 + cty * 24] =
            (((src[0 + cty * input_stride]) << 8) |
             ((src[1 + cty * input_stride])));
        global_src[2 + cty * 24] =
            (((src[2 + cty * input_stride]) << 8) |
             ((src[3 + cty * input_stride])));
        global_src[3 + cty * 24] =
            (((src[4 + cty * input_stride]) << 8) |
             ((src[5 + cty * input_stride])));
        global_src[4 + cty * 24] =
            (((src[6 + cty * input_stride]) << 8) |
             ((src[7 + cty * input_stride])));
        global_src[5 + cty * 24] =
            (((src[8 + cty * input_stride]) << 8) |
             ((src[9 + cty * input_stride])));
    }
    IMG_pix_inter_16x16(global_src, global_dst, 0, rounding);
    //  #pragma MUST_ITERATE(8,8)
    for (cty = 0; cty < 8; cty++)   //dst to out_h
    {
        dst[2 * 0 + cty * output_stride] =
            (global_dst[2 * 0 + cty * 18 * 2] & 0xFF);
        dst[2 * 0 + 1 + cty * output_stride] =
            (global_dst[2 * 0 + cty * 18 * 2 + 2] >> 8);
        dst[2 * 1 + cty * output_stride] =
            (global_dst[2 * 1 + cty * 18 * 2] & 0xFF);
        dst[2 * 1 + 1 + cty * output_stride] =
            (global_dst[2 * 1 + cty * 18 * 2 + 2] >> 8);
        dst[2 * 2 + cty * output_stride] =
            (global_dst[2 * 2 + cty * 18 * 2] & 0xFF);
        dst[2 * 2 + 1 + cty * output_stride] =
            (global_dst[2 * 2 + cty * 18 * 2 + 2] >> 8);
        dst[2 * 3 + cty * output_stride] =
            (global_dst[2 * 3 + cty * 18 * 2] & 0xFF);
        dst[2 * 3 + 1 + cty * output_stride] =
            (global_dst[2 * 3 + cty * 18 * 2 + 2] >> 8);
    }
}

void
HW_halfpel8x8_v(uint8 * dst, uint8 * src, const int input_stride,
                const int output_stride, const int rounding)
{
    int     cty;
//  #pragma MUST_ITERATE(9,9)
    for (cty = 0; cty < 9; cty++)   //src
    {
        global_src[1 + cty * 24] =
            (((src[0 + cty * input_stride]) << 8) |
             ((src[1 + cty * input_stride])));
        global_src[2 + cty * 24] =
            (((src[2 + cty * input_stride]) << 8) |
             ((src[3 + cty * input_stride])));
        global_src[3 + cty * 24] =
            (((src[4 + cty * input_stride]) << 8) |
             ((src[5 + cty * input_stride])));
        global_src[4 + cty * 24] =
            (((src[6 + cty * input_stride]) << 8) |
             ((src[7 + cty * input_stride])));
        global_src[5 + cty * 24] =
            (((src[8 + cty * input_stride]) << 8) |
             ((src[9 + cty * input_stride])));
    }
    IMG_pix_inter_16x16(global_src, global_dst, 0, rounding);
    //  #pragma MUST_ITERATE(8,8)
    for (cty = 0; cty < 8; cty++)   //dst to out_h
    {
        dst[2 * 0 + cty * output_stride] =
            (global_dst[2 * 0 + 18 + 1 + cty * 18 * 2] >> 8);
        dst[2 * 0 + 1 + cty * output_stride] =
            (global_dst[2 * 0 + 18 + 1 + cty * 18 * 2] & 0xFF);
        dst[2 * 1 + cty * output_stride] =
            (global_dst[2 * 1 + 18 + 1 + cty * 18 * 2] >> 8);
        dst[2 * 1 + 1 + cty * output_stride] =
            (global_dst[2 * 1 + 18 + 1 + cty * 18 * 2] & 0xFF);
        dst[2 * 2 + cty * output_stride] =
            (global_dst[2 * 2 + 18 + 1 + cty * 18 * 2] >> 8);
        dst[2 * 2 + 1 + cty * output_stride] =
            (global_dst[2 * 2 + 18 + 1 + cty * 18 * 2] & 0xFF);
        dst[2 * 3 + cty * output_stride] =
            (global_dst[2 * 3 + 18 + 1 + cty * 18 * 2] >> 8);
        dst[2 * 3 + 1 + cty * output_stride] =
            (global_dst[2 * 3 + 18 + 1 + cty * 18 * 2] & 0xFF);
    }
}

void
HW_halfpel8x8_hv(uint8 * dst, uint8 * src, const int input_stride,
                 const int output_stride, const int rounding)
{
    int     cty;
//  #pragma MUST_ITERATE(9,9)
    for (cty = 0; cty < 9; cty++)   //src
    {
        global_src[1 + cty * 24] =
            (((src[0 + cty * input_stride]) << 8) |
             ((src[1 + cty * input_stride])));
        global_src[2 + cty * 24] =
            (((src[2 + cty * input_stride]) << 8) |
             ((src[3 + cty * input_stride])));
        global_src[3 + cty * 24] =
            (((src[4 + cty * input_stride]) << 8) |
             ((src[5 + cty * input_stride])));
        global_src[4 + cty * 24] =
            (((src[6 + cty * input_stride]) << 8) |
             ((src[7 + cty * input_stride])));
        global_src[5 + cty * 24] =
            (((src[8 + cty * input_stride]) << 8) |
             ((src[9 + cty * input_stride])));
    }
    IMG_pix_inter_16x16(global_src, global_dst, 0, rounding);
    //#pragma MUST_ITERATE(8,8)
    for (cty = 0; cty < 8; cty++)   //dst to out_h
    {
        dst[2 * 0 + cty * output_stride] =
            (global_dst[2 * 0 + cty * 18 * 2 + 18] & 0xFF);
        dst[2 * 0 + 1 + cty * output_stride] =
            (global_dst[2 * 0 + cty * 18 * 2 + 2 + 18] >> 8);
        dst[2 * 1 + cty * output_stride] =
            (global_dst[2 * 1 + cty * 18 * 2 + 18] & 0xFF);
        dst[2 * 1 + 1 + cty * output_stride] =
            (global_dst[2 * 1 + cty * 18 * 2 + 2 + 18] >> 8);
        dst[2 * 2 + cty * output_stride] =
            (global_dst[2 * 2 + cty * 18 * 2 + 18] & 0xFF);
        dst[2 * 2 + 1 + cty * output_stride] =
            (global_dst[2 * 2 + cty * 18 * 2 + 2 + 18] >> 8);
        dst[2 * 3 + cty * output_stride] =
            (global_dst[2 * 3 + cty * 18 * 2 + 18] & 0xFF);
        dst[2 * 3 + 1 + cty * output_stride] =
            (global_dst[2 * 3 + cty * 18 * 2 + 2 + 18] >> 8);
    }
}
